November 21, 2024
# Independent variables
nes |> group_by(better_worse_past_econ) |> summarize(n=n()) |> mutate(pct=100*n/sum(n))# A tibble: 6 × 3
better_worse_past_econ n pct
<dbl+lbl> <int> <dbl>
1 1 [Much better] 249 5.83
2 2 [Somwht better] 951 22.3
3 3 [Abt same] 1806 42.3
4 4 [Somwht worse] 670 15.7
5 5 [Much worse] 576 13.5
6 NA 19 0.445
# A tibble: 8 × 3
partyid7 n pct
<dbl+lbl> <int> <dbl>
1 1 [StrngDem] 890 20.8
2 2 [WkDem] 560 13.1
3 3 [IndDem] 490 11.5
4 4 [Indep] 579 13.6
5 5 [IndRep] 500 11.7
6 6 [WkRep] 508 11.9
7 7 [StrngRep] 721 16.9
8 NA 23 0.539
# Independent variables
nes |> group_by(Relig_attend) |> summarize(n=n()) |> mutate(pct=100*n/sum(n))# A tibble: 7 × 3
Relig_attend n pct
<dbl+lbl> <int> <dbl>
1 0 [Never] 1749 41.0
2 1 [Few tmes/yr] 704 16.5
3 2 [1-2/ mnth] 471 11.0
4 3 [Almst ev wk] 536 12.5
5 4 [Every wk] 426 9.97
6 5 [>Every wk] 362 8.48
7 NA 23 0.539
# A tibble: 4 × 3
Race3 n pct
<dbl+lbl> <int> <dbl>
1 1 [White] 3038 71.1
2 2 [Black] 398 9.32
3 3 [Hispanic] 450 10.5
4 NA 385 9.01
# Logit model using "glm"
vote_l <- glm(Clinton_vote ~ better_worse_past_econ +
partyid7 + Relig_attend + as.factor(Race3),
family=binomial(link=logit), data=nes)
summary(vote_l, digits=4)
Call:
glm(formula = Clinton_vote ~ better_worse_past_econ + partyid7 +
Relig_attend + as.factor(Race3), family = binomial(link = logit),
data = nes)
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 7.40859 0.35695 20.756 < 2e-16 ***
better_worse_past_econ -1.08213 0.09016 -12.003 < 2e-16 ***
partyid7 -1.02725 0.04438 -23.146 < 2e-16 ***
Relig_attend -0.20739 0.04616 -4.493 7.03e-06 ***
as.factor(Race3)2 2.29559 0.40660 5.646 1.64e-08 ***
as.factor(Race3)3 1.36818 0.27240 5.023 5.10e-07 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 3340.5 on 2410 degrees of freedom
Residual deviance: 1203.7 on 2405 degrees of freedom
(1860 observations deleted due to missingness)
AIC: 1215.7
Number of Fisher Scoring iterations: 6
# Goodness of fit: Proportional reduction in error (PRE)
# First, save "fitted values" (predicted prob(Clinton) for each observation);
# generate whether observation is "correctly predicted" or not (using .5 threshold).
# Using the "fitted" command requires removing all missing data.
# Note I create new data object, "nes_fit"
nes_fit <- nes |>
drop_na(Clinton_vote, better_worse_past_econ, partyid7, Relig_attend, Race3) |>
mutate(prob=fitted(vote_l),
correct=ifelse(prob>.5 & Clinton_vote==1, 1, 0),
correct=ifelse(prob<.5 & Clinton_vote==0, 1, correct))# Second, generate three core measures -- PMC, PCP, and PRE -- from the "nes_fit" object
# NOTE: if "1" is the modal category of DV (which it is in this case), use code below for "PMC": PMC=mean(Clinton_vote)
# If "0" is the modal category of DV, use: PMC=1-mean(Clinton_vote)
nes_fit |> summarize(PMC=mean(Clinton_vote), PCP=mean(correct)) |>
mutate(PRE = (PCP-PMC) / (1-PMC) )# A tibble: 1 × 3
PMC PCP PRE
<dbl> <dbl> <dbl>
1 0.514 0.904 0.803
## ECONOMIC PERCEPTIONS
## AVG CASE APPROACH
econ_ac_l <- predictions(
vote_l,
by = "better_worse_past_econ",
numderiv = "richardson",
newdata = datagrid(better_worse_past_econ = 1:5,
grid_type = "counterfactual"))
head(econ_ac_l)
better_worse_past_econ Estimate Pr(>|z|) S 2.5 % 97.5 %
1 0.915 < 0.001 102.4 0.8783 0.941
2 0.785 < 0.001 80.1 0.7401 0.823
3 0.552 0.00905 6.8 0.5131 0.591
4 0.295 < 0.001 44.0 0.2499 0.344
5 0.124 < 0.001 78.7 0.0887 0.171
Columns: better_worse_past_econ, estimate, p.value, s.value, conf.low, conf.high
Type: invlink(link)
## OBSERVED VALUE APPROACH
econ_ov_l <- predictions(
vote_l,
type = "response",
by = "better_worse_past_econ",
newdata = datagrid(better_worse_past_econ = 1:5,
grid_type = "counterfactual"))
head(econ_ov_l)
better_worse_past_econ Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %
1 0.726 0.0203 35.7 <0.001 924.1 0.686 0.765
2 0.621 0.0118 52.4 <0.001 Inf 0.598 0.644
3 0.520 0.0069 75.5 <0.001 Inf 0.507 0.534
4 0.419 0.0114 36.8 <0.001 983.4 0.397 0.441
5 0.311 0.0201 15.5 <0.001 178.1 0.272 0.351
Columns: better_worse_past_econ, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high
Type: response
## RACE
## AVG CASE
race_ac_l <- predictions(
vote_l,
by = "Race3",
numderiv = "richardson",
newdata = datagrid(Race3 = 1:3,
grid_type = "counterfactual"))
head(race_ac_l)
Race3 Estimate Pr(>|z|) S 2.5 % 97.5 %
1 0.451 0.0163 5.9 0.412 0.491
2 0.891 <0.001 22.9 0.789 0.947
3 0.764 <0.001 17.3 0.660 0.843
Columns: Race3, estimate, p.value, s.value, conf.low, conf.high
Type: invlink(link)
## OBSERVED VALUE
race_ov_l <- predictions(
vote_l,
type = "response",
by = "Race3",
numderiv = "richardson",
newdata = datagrid(Race3 = 1:3,
grid_type = "counterfactual"))
head(race_ov_l)
Race3 Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %
1 0.493 0.00654 75.4 <0.001 Inf 0.480 0.506
2 0.683 0.03475 19.7 <0.001 283.3 0.615 0.751
3 0.604 0.02151 28.1 <0.001 574.5 0.562 0.646
Columns: Race3, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high
Type: response
# Compare AC to OV: Econ Perceptions
ggplot(econ_ac_l, aes(x=better_worse_past_econ, y=estimate)) +
geom_line(color="black") +
geom_line(data=econ_ov_l, aes(x=better_worse_past_econ, y=estimate), color="dodgerblue") +
labs(x="Economic perceptions", y="Predicted Prob(Clinton Vote)") +
geom_text(x=2, y=.86, label="Avg. case", color="black") +
geom_text(x=1.5, y=.6, label="Obs. value", color="dodgerblue") +
scale_x_continuous(breaks=c(1:5), labels=c("Better", "2", "3", "4", "Worse")) +
scale_y_continuous(limits = c(.1, 1), breaks = seq(.1, 1, by = .10))# Compare AC to OV: Race
ggplot(race_ac_l, aes(x=as.numeric(Race3), y=estimate)) +
geom_line(color="black") +
geom_line(data=race_ov_l, aes(x=as.numeric(Race3), y=estimate), color="dodgerblue") +
labs(x="", y="Predicted Prob(Clinton Vote)") +
geom_text(x=2, y=.93, label="Avg. case", color="black") +
geom_text(x=1.5, y=.5, label="Obs. value", color="dodgerblue") +
scale_x_continuous(breaks=c(1:3), labels=c("White", "Black", "Hispanic")) +
scale_y_continuous(limits = c(.1, 1), breaks = seq(.1, 1, by = .10))# Probit
vote_p <- glm(Clinton_vote ~ better_worse_past_econ +
partyid7 + Relig_attend + as.factor(Race3),
family=binomial(link=probit), data=nes)
summary(vote_p, digits=4)
Call:
glm(formula = Clinton_vote ~ better_worse_past_econ + partyid7 +
Relig_attend + as.factor(Race3), family = binomial(link = probit),
data = nes)
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 4.07811 0.17785 22.930 < 2e-16 ***
better_worse_past_econ -0.58227 0.04767 -12.215 < 2e-16 ***
partyid7 -0.57227 0.02225 -25.716 < 2e-16 ***
Relig_attend -0.11744 0.02476 -4.743 2.10e-06 ***
as.factor(Race3)2 1.19918 0.20845 5.753 8.77e-09 ***
as.factor(Race3)3 0.74229 0.14804 5.014 5.32e-07 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 3340.5 on 2410 degrees of freedom
Residual deviance: 1205.1 on 2405 degrees of freedom
(1860 observations deleted due to missingness)
AIC: 1217.1
Number of Fisher Scoring iterations: 6
# ECON PERCEPTIONS
# AVERAGE CASE APPROACH
econ_ac_p <- predictions(
vote_p,
by = "better_worse_past_econ",
numderiv = "richardson",
newdata = datagrid(better_worse_past_econ = 1:5,
grid_type = "counterfactual"))
head(econ_ac_p)
better_worse_past_econ Estimate Pr(>|z|) S 2.5 % 97.5 %
1 0.900 < 0.001 106.7 0.858 0.932
2 0.758 < 0.001 83.8 0.715 0.796
3 0.546 0.00646 7.3 0.513 0.579
4 0.321 < 0.001 44.4 0.279 0.365
5 0.147 < 0.001 80.5 0.106 0.198
Columns: better_worse_past_econ, estimate, p.value, s.value, conf.low, conf.high
Type: invlink(link)
## OBSERVED VALUE APPROACH
econ_ov_p <- predictions(
vote_p,
type = "response",
by = "better_worse_past_econ",
newdata = datagrid(better_worse_past_econ = 1:5,
grid_type = "counterfactual"))
head(econ_ov_p)
better_worse_past_econ Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %
1 0.722 0.01948 37.1 <0.001 997.8 0.684 0.761
2 0.621 0.01174 52.9 <0.001 Inf 0.598 0.644
3 0.521 0.00692 75.3 <0.001 Inf 0.508 0.535
4 0.420 0.01130 37.2 <0.001 1004.0 0.398 0.443
5 0.316 0.01933 16.4 <0.001 197.6 0.278 0.354
Columns: better_worse_past_econ, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high
Type: response
## RACE
## AVG CASE
race_ac_p <- predictions(
vote_p,
by = "Race3",
numderiv = "richardson",
newdata = datagrid(Race3 = 1:3,
grid_type = "counterfactual"))
head(race_ac_p)
Race3 Estimate Pr(>|z|) S 2.5 % 97.5 %
1 0.461 0.0243 5.4 0.427 0.495
2 0.865 <0.001 24.0 0.759 0.933
3 0.740 <0.001 17.6 0.643 0.821
Columns: Race3, estimate, p.value, s.value, conf.low, conf.high
Type: invlink(link)
## OBSERVED VALUE
race_ov_p <- predictions(
vote_p,
type = "response",
by = "Race3",
numderiv = "richardson",
newdata = datagrid(Race3 = 1:3,
grid_type = "counterfactual"))
head(race_ov_p)
Race3 Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %
1 0.494 0.00658 75.1 <0.001 Inf 0.481 0.507
2 0.675 0.03183 21.2 <0.001 329.4 0.613 0.738
3 0.606 0.02143 28.3 <0.001 581.5 0.564 0.648
Columns: Race3, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high
Type: response
# Compare logit v. probit probs (obs. value): Econ
ggplot(econ_ov_l, aes(x=better_worse_past_econ, y=estimate)) +
geom_line(color="black") +
geom_line(data=econ_ov_p, aes(x=better_worse_past_econ, y=estimate), color="dodgerblue") +
labs(x="Economic perceptions", y="Predicted Prob(Clinton Vote)") +
geom_text(x=2, y=.7, label="Logit", color="black") +
geom_text(x=1.5, y=.6, label="Probit", color="dodgerblue") +
scale_x_continuous(breaks=c(1:5), labels=c("Better", "2", "3", "4", "Worse")) +
scale_y_continuous(limits = c(.1, 1), breaks = seq(.1, 1, by = .10))# Compare logit v. probit probs (obs. value): Race
ggplot(race_ov_l, aes(x=as.numeric(Race3), y=estimate)) +
geom_line(color="black") +
geom_line(data=race_ov_p, aes(x=as.numeric(Race3), y=estimate), color="dodgerblue") +
labs(x="", y="Predicted Prob(Clinton Vote)") +
geom_text(x=2, y=.75, label="Logit", color="black") +
geom_text(x=1.5, y=.5, label="Probit", color="dodgerblue") +
scale_x_continuous(breaks=c(1:3), labels=c("White", "Black", "Hispanic")) +
scale_y_continuous(limits = c(.1, 1), breaks = seq(.1, 1, by = .10))# LPM with robust standard errors; use lm_robust from estimatr package
vote_lpm <- lm_robust(Clinton_vote ~ better_worse_past_econ +
partyid7 + Relig_attend + as.factor(Race3), data=nes,
se_type="stata")
summary(vote_lpm)
Call:
lm_robust(formula = Clinton_vote ~ better_worse_past_econ + partyid7 +
Relig_attend + as.factor(Race3), data = nes, se_type = "stata")
Standard error type: HC1
Coefficients:
Estimate Std. Error t value Pr(>|t|) CI Lower
(Intercept) 1.32678 0.016381 80.993 0.000e+00 1.29466
better_worse_past_econ -0.08695 0.006912 -12.580 3.420e-35 -0.10051
partyid7 -0.14105 0.003353 -42.064 2.677e-290 -0.14762
Relig_attend -0.01670 0.003465 -4.818 1.537e-06 -0.02349
as.factor(Race3)2 0.12509 0.017849 7.008 3.122e-12 0.09009
as.factor(Race3)3 0.11881 0.020582 5.773 8.805e-09 0.07845
CI Upper DF
(Intercept) 1.358907 2405
better_worse_past_econ -0.073398 2405
partyid7 -0.134473 2405
Relig_attend -0.009902 2405
as.factor(Race3)2 0.160087 2405
as.factor(Race3)3 0.159175 2405
Multiple R-squared: 0.6618 , Adjusted R-squared: 0.6611
F-statistic: 2420 on 5 and 2405 DF, p-value: < 2.2e-16
# Pred. probs: Econ
econ_lpm <- predictions(
vote_lpm,
by = "better_worse_past_econ",
df = insight::get_df(vote_lpm),
numderiv = "richardson",
newdata = datagrid(better_worse_past_econ = 1:5,
grid_type="counterfactual"))
head(econ_lpm)
better_worse_past_econ Estimate Std. Error t Pr(>|t|) S 2.5 % 97.5 %
1 0.691 0.01505 45.9 <0.001 Inf 0.662 0.721
2 0.605 0.00911 66.3 <0.001 Inf 0.587 0.622
3 0.518 0.00592 87.4 <0.001 Inf 0.506 0.529
4 0.431 0.00909 47.4 <0.001 Inf 0.413 0.448
5 0.344 0.01503 22.9 <0.001 346.2 0.314 0.373
Df
2405
2405
2405
2405
2405
Columns: better_worse_past_econ, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, df
Type: response
# Compare logit (obs. value) and LPM
ggplot(econ_ov_l, aes(x=better_worse_past_econ, y=estimate)) +
geom_line(color="black") +
geom_line(data=econ_lpm, aes(x=better_worse_past_econ, y=estimate), color="dodgerblue") +
labs(x="Economic perceptions", y="Predicted Prob(Clinton Vote)") +
geom_text(x=2, y=.7, label="Logit (Obs. Value)", color="black") +
geom_text(x=1.5, y=.6, label="LPM", color="dodgerblue") +
scale_x_continuous(breaks=c(1:5), labels=c("Better", "2", "3", "4", "Worse")) +
scale_y_continuous(limits = c(.1, 1), breaks = seq(.1, 1, by = .10))# Compare probit (OV) and LPM
ggplot(econ_ov_p, aes(x=better_worse_past_econ, y=estimate)) +
geom_line(color="black") +
geom_line(data=econ_lpm, aes(x=better_worse_past_econ, y=estimate), color="dodgerblue") +
labs(x="Economic perceptions", y="Predicted Prob(Clinton Vote)") +
geom_text(x=2, y=.7, label="Probit (Obs. Value)", color="black") +
geom_text(x=1.5, y=.6, label="LPM", color="dodgerblue") +
scale_x_continuous(breaks=c(1:5), labels=c("Better", "2", "3", "4", "Worse")) +
scale_y_continuous(limits = c(.1, 1), breaks = seq(.1, 1, by = .10))